import requests
import re


def get_html(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62"}

    response = requests.get(url, headers=headers)
    html = response.content.decode()
    return html


def get_list(key_id, html):
    patterns = {
        'key': 'function(.*?){',
        'value': 'mutations:.*?}}\((.*?)\)\)\);',
        'name': 'univNameCn:"(.*?)"',
        'province': 'province:(.*?),',
        'type': 'univCategory:(.*?),',
        'score': 'score:(.*?),'
    }
    if key_id in ['key', 'value']:
        com = re.compile(patterns[key_id], re.S)
        list_info = re.search(com, html)
    else:
        com = re.compile(patterns[key_id], re.S)
        list_info = re.finditer(com, html)
    return list_info


def get_params_dict(key_list, value_list):
    params_dict = {}
    key_list = key_list.group(1)[1:-1].split(',')
    value_list = [x.strip('"') for x in value_list.group(1).split(',')]
    value_list[value_list.index('2021')] = value_list[value_list.index('2021')] + ',' + value_list[
        value_list.index('2020')]
    for i, j in zip(key_list, value_list):
        params_dict[i] = j
    return params_dict


def get_info_list(params_dict, list_info):
    info_list = []
    for i in list_info:
        info = i.group(1)
        if '\u4e00' <= info[0] <= '\u9fa5':
            info_list.append(info)
        elif info.isalpha():
            info_list.append(params_dict[info])
        else:
            info_list.append(info)
    return info_list


def save_info(info_lists, key_ids):
    info_dict_list = []
    for i in zip(info_lists[0], info_lists[1], info_lists[2], info_lists[3]):
        info_dict = {}
        for j in i:
            key_id = key_ids[i.index(j)]
            info_dict[key_id] = j
        info_dict_list.append(info_dict)
    return info_dict_list


def main():
    url = "https://www.shanghairanking.cn/_nuxt/static/1636358336/rankings/bcur/2021/payload.js"
    html = get_html(url)
    params_dict = get_params_dict(get_list('key', html), get_list('value', html))
    key_ids = ['name', 'province', 'type', 'score']
    info_lists = []
    for i in key_ids:
        info_list = get_info_list(params_dict, get_list(i, html))
        info_lists.append(info_list)
    info_dict_list = save_info(info_lists, key_ids)
    print(info_dict_list)


if __name__ == '__main__':
    main()
